URLs http://rpubs.com/adam_dennett/443357
http://egallic.fr/R/sKDE/smooth-maps/kde.html
libraries
#install.packages("downloader")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0.9000 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(downloader)
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.4.4
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.4.4
## rgdal: version: 1.3-4, (SVN revision 766)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
## Linking to sp version: 1.3-1
library(sf)
## Warning: package 'sf' was built under R version 3.4.4
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.4.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#library(highcharter)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.4.4
Get data
LondonWards <- readOGR("/Volumes/ucfnnap/CASA_GIS and Sc/wk8/NewLondonWard/NewLondonWard.shp", layer="NewLondonWard")
## OGR data source with driver: ESRI Shapefile
## Source: "/Volumes/ucfnnap/CASA_GIS and Sc/wk8/NewLondonWard/NewLondonWard.shp", layer: "NewLondonWard"
## with 625 features
## It has 76 fields
## Integer64 fields read as strings: x y
LondonWardsSF <- st_as_sf(LondonWards)
extradata <- read_csv("https://www.dropbox.com/s/qay9q1jwpffxcqj/LondonAdditionalDataFixed.csv?raw=1")
## Parsed with column specification:
## cols(
## WardName = col_character(),
## WardCode = col_character(),
## Wardcode = col_character(),
## PctSharedOwnership2011 = col_double(),
## PctRentFree2011 = col_double(),
## Candidate = col_character(),
## InnerOuter = col_character(),
## x = col_double(),
## y = col_double(),
## AvgGCSE2011 = col_double(),
## UnauthAbsenceSchools11 = col_double()
## )
Merge datasets
LondonWardsSF <- merge(LondonWardsSF, extradata, by.x = "WD11CD", by.y = "Wardcode")
summary(LondonWardsSF)
## WD11CD WD11CDO WD11NM WD11NMW
## E05000026: 1 00AA : 1 Village : 3 NA's:625
## E05000027: 1 00ABFX : 1 Abbey : 2
## E05000028: 1 00ABFY : 1 Alexandra : 2
## E05000029: 1 00ABFZ : 1 Barnhill : 2
## E05000030: 1 00ABGA : 1 Belmont : 2
## E05000031: 1 00ABGB : 1 Brunswick Park: 2
## (Other) :619 (Other):619 (Other) :612
## WardName.x WardCode.x Wardcode1
## Barking and Dagenham - Abbey : 1 00AA : 1 E05000026: 1
## Barking and Dagenham - Alibon : 1 00ABFX : 1 E05000027: 1
## Barking and Dagenham - Becontree : 1 00ABFY : 1 E05000028: 1
## Barking and Dagenham - Chadwell Heath: 1 00ABFZ : 1 E05000029: 1
## Barking and Dagenham - Eastbrook : 1 00ABGA : 1 E05000030: 1
## Barking and Dagenham - Eastbury : 1 00ABGB : 1 (Other) :619
## (Other) :619 (Other):619 NA's : 1
## PopCensus2 Aged0_15 Aged16_64 Aged65plus
## Min. : 5110 Min. : 620 Min. : 3056 Min. : 431
## 1st Qu.:11197 1st Qu.:2041 1st Qu.: 7536 1st Qu.:1104
## Median :12979 Median :2517 Median : 9024 Median :1338
## Mean :13078 Mean :2600 Mean : 9031 Mean :1448
## 3rd Qu.:14862 3rd Qu.:3084 3rd Qu.:10388 3rd Qu.:1667
## Max. :23084 Max. :5652 Max. :18688 Max. :3364
##
## PctAged0_1 PctAged16_ PctAged65p MeanAge201
## Min. : 7.134 Min. :58.38 Min. : 3.483 Min. :29.00
## 1st Qu.:17.523 1st Qu.:65.02 1st Qu.: 8.352 1st Qu.:33.60
## Median :19.535 Median :68.19 Median :10.661 Median :35.40
## Mean :19.688 Mean :68.96 Mean :11.348 Mean :35.82
## 3rd Qu.:21.933 3rd Qu.:72.54 3rd Qu.:13.739 3rd Qu.:37.90
## Max. :33.188 Max. :82.85 Max. :23.523 Max. :44.10
##
## MedianAge2 AreaSqKM PopDensity PctBame
## Min. :26.00 Min. : 0.400 Min. : 181 Min. : 4.10
## 1st Qu.:31.00 1st Qu.: 1.200 1st Qu.: 4500 1st Qu.:23.50
## Median :33.00 Median : 1.900 Median : 6600 Median :36.10
## Mean :34.26 Mean : 2.552 Mean : 7930 Mean :38.74
## 3rd Qu.:37.00 3rd Qu.: 2.900 3rd Qu.:10500 3rd Qu.:52.30
## Max. :46.00 Max. :29.000 Max. :27750 Max. :93.70
##
## PctNotBorn PctNoEngli GenFertRat MaleLE0509
## Min. : 5.10 Min. : 0.40 Min. : 21.79 Min. :71.22
## 1st Qu.:26.80 1st Qu.: 7.00 1st Qu.: 55.35 1st Qu.:75.91
## Median :37.30 Median :11.90 Median : 66.90 Median :78.03
## Mean :36.05 Mean :12.85 Mean : 68.79 Mean :78.38
## 3rd Qu.:45.80 3rd Qu.:18.10 3rd Qu.: 79.63 3rd Qu.:80.35
## Max. :68.50 Max. :36.70 Max. :130.20 Max. :98.06
##
## FemaleLE05 RateAmbula RatesAmbul InEmployme
## Min. :75.91 Min. : 68.7 Min. : 0.1373 Min. : 2443
## 1st Qu.:81.41 1st Qu.:103.8 1st Qu.: 0.4303 1st Qu.: 5461
## Median :83.28 Median :119.6 Median : 0.5851 Median : 6247
## Mean :83.60 Mean :131.8 Mean : 0.8593 Mean : 6398
## 3rd Qu.:85.41 3rd Qu.:143.4 3rd Qu.: 0.8543 3rd Qu.: 7216
## Max. :99.55 Max. :959.7 Max. :26.8356 Max. :13838
##
## Employment NoJobs2011 EmpWkAgePo RateNINoFo
## Min. :45.02 Min. : 600 Min. : 0.07639 Min. : 0.7246
## 1st Qu.:61.85 1st Qu.: 2100 1st Qu.: 0.25764 1st Qu.: 20.0000
## Median :65.95 Median : 3500 Median : 0.40203 Median : 38.8571
## Mean :65.46 Mean : 7091 Mean : 0.80134 Mean : 43.9887
## 3rd Qu.:69.24 3rd Qu.: 6000 3rd Qu.: 0.67914 3rd Qu.: 60.4520
## Max. :81.48 Max. :382700 Max. :50.02183 Max. :149.1803
##
## MeanHouseP NoProperti NoHousehol PctDetache
## Min. : 139270 Min. : 25 Min. : 2169 Min. : 0.300
## 1st Qu.: 238347 1st Qu.: 94 1st Qu.: 4570 1st Qu.: 2.200
## Median : 321992 Median :128 Median : 5335 Median : 4.300
## Mean : 415938 Mean :138 Mean : 5420 Mean : 6.598
## 3rd Qu.: 453378 3rd Qu.:168 3rd Qu.: 6178 3rd Qu.: 7.400
## Max. :4595285 Max. :474 Max. :12035 Max. :55.800
##
## PctSemiDet PctTerrace PctFlatMai PctOwned20
## Min. : 0.20 Min. : 1.40 Min. : 6.30 Min. :11.90
## 1st Qu.: 6.40 1st Qu.:13.30 1st Qu.:29.40 1st Qu.:34.30
## Median :16.90 Median :21.00 Median :46.10 Median :48.30
## Mean :19.95 Mean :23.65 Mean :49.72 Mean :49.67
## 3rd Qu.:30.00 3rd Qu.:32.60 3rd Qu.:71.40 3rd Qu.:64.40
## Max. :82.30 Max. :63.80 Max. :98.00 Max. :90.80
##
## PctSocialR PctPrivate PctSharedO PctRentFre
## Min. : 1.10 Min. : 4.80 Min. :0.04818 Min. :0.5202
## 1st Qu.:11.30 1st Qu.:17.30 1st Qu.:0.51520 1st Qu.:0.9385
## Median :20.40 Median :24.40 Median :0.95765 Median :1.1492
## Mean :23.33 Mean :24.45 Mean :1.22841 Mean :1.3102
## 3rd Qu.:34.00 3rd Qu.:31.60 3rd Qu.:1.61262 3rd Qu.:1.4391
## Max. :68.40 Max. :55.50 Max. :7.38835 Max. :9.9948
##
## PctCTaxBan PctCTaxB_1 PctCTaxB_2 MortgageRe
## Min. : 0.2654 Min. : 5.316 Min. : 0.000 Min. : 0.000
## 1st Qu.: 5.4676 1st Qu.:59.107 1st Qu.: 1.490 1st Qu.: 0.000
## Median :11.8540 Median :69.181 Median : 8.135 Median :10.000
## Mean :16.4961 Mean :68.873 Mean :15.255 Mean : 8.696
## 3rd Qu.:25.2222 3rd Qu.:80.993 3rd Qu.:24.458 3rd Qu.:15.000
## Max. :64.3534 Max. :98.020 Max. :95.713 Max. :70.000
##
## LandlordRe Incapacity IncomeSupp JSAClaiman
## Min. : 0.00 Min. : 0.2381 Min. : 0.1786 Min. : 0.4072
## 1st Qu.: 20.00 1st Qu.: 2.8571 1st Qu.: 2.9936 1st Qu.: 3.3068
## Median : 40.00 Median : 4.0796 Median : 4.7333 Median : 5.4305
## Mean : 43.22 Mean : 4.2071 Mean : 5.0141 Mean : 6.1117
## 3rd Qu.: 60.00 3rd Qu.: 5.3459 3rd Qu.: 6.7424 3rd Qu.: 8.3867
## Max. :195.00 Max. :12.6357 Max. :16.1147 Max. :23.4036
##
## JSAClaim_1 PctDepChil PctDepCh_1 PctHHNoAdu
## Min. : 0.00 Min. : 3.343 Min. : 2.736 Min. : 0.8666
## 1st Qu.: 6.45 1st Qu.:21.610 1st Qu.:16.694 1st Qu.: 3.1171
## Median :10.43 Median :31.522 Median :26.265 Median : 5.1414
## Mean :11.73 Mean :32.404 Mean :26.172 Mean : 5.5876
## 3rd Qu.:15.72 3rd Qu.:42.588 3rd Qu.:34.973 3rd Qu.: 7.4833
## Max. :48.67 Max. :63.041 Max. :55.172 Max. :15.5004
##
## PctLonePar IDRankLond IDPctWorst AvgGCSE201
## Min. :20.82 Min. : 1.0 Min. : 0.00 Min. :245.0
## 1st Qu.:40.91 1st Qu.:157.0 1st Qu.: 30.00 1st Qu.:332.3
## Median :46.67 Median :313.0 Median : 83.33 Median :343.7
## Mean :45.97 Mean :313.5 Mean : 65.55 Mean :345.8
## 3rd Qu.:51.36 3rd Qu.:470.0 3rd Qu.:100.00 3rd Qu.:358.3
## Max. :75.63 Max. :628.0 Max. :100.00 Max. :409.1
##
## UnauthAbse PctWithNoQ PctLev4Qua CrimeRate1
## Min. :0.2463 Min. : 3.80 Min. :12.50 Min. : 25.75
## 1st Qu.:0.8215 1st Qu.:13.50 1st Qu.:27.30 1st Qu.: 64.09
## Median :1.1364 Median :17.40 Median :35.50 Median : 84.83
## Mean :1.1286 Mean :17.62 Mean :37.66 Mean : 101.05
## 3rd Qu.:1.4105 3rd Qu.:21.60 3rd Qu.:47.00 3rd Qu.: 107.57
## Max. :2.4675 Max. :35.80 Max. :68.70 Max. :2100.20
##
## ViolenceRa RobberyRat TheftAndHa CriminalDa
## Min. : 3.163 Min. : 0.215 Min. : 0.00 Min. : 2.474
## 1st Qu.: 11.058 1st Qu.: 2.116 1st Qu.: 20.90 1st Qu.: 6.512
## Median : 16.878 Median : 3.833 Median : 29.61 Median : 8.358
## Mean : 18.737 Mean : 4.580 Mean : 41.69 Mean : 8.962
## 3rd Qu.: 23.082 3rd Qu.: 6.042 3rd Qu.: 40.74 3rd Qu.:10.783
## Max. :215.882 Max. :53.668 Max. :1486.73 Max. :42.613
##
## DrugsRate1 Deliberate PctOpenSpa CarsPerHH2
## Min. : 0.6987 Min. :0.0000 Min. : 0.00 Min. :0.2333
## 1st Qu.: 2.7723 1st Qu.:0.2000 1st Qu.:13.84 1st Qu.:0.5591
## Median : 5.0365 Median :0.4000 Median :23.51 Median :0.8184
## Mean : 7.4460 Mean :0.5402 Mean :27.13 Mean :0.8426
## 3rd Qu.: 8.5567 3rd Qu.:0.7000 3rd Qu.:36.85 3rd Qu.:1.1088
## Max. :190.9804 Max. :3.5000 Max. :88.80 Max. :1.7051
##
## AvgPubTran TurnoutMay ID x.x y.x
## Min. :1.250 Min. :19.30 Min. : 0 NA's:625 NA's:625
## 1st Qu.:2.584 1st Qu.:30.90 1st Qu.:156
## Median :3.221 Median :34.22 Median :312
## Mean :3.639 Mean :34.14 Mean :312
## 3rd Qu.:4.377 3rd Qu.:37.64 3rd Qu.:468
## Max. :7.950 Max. :51.72 Max. :624
##
## WardName.y WardCode.y PctSharedOwnership2011
## Length:625 Length:625 Min. :0.04818
## Class :character Class :character 1st Qu.:0.51520
## Mode :character Mode :character Median :0.95765
## Mean :1.22841
## 3rd Qu.:1.61262
## Max. :7.38835
##
## PctRentFree2011 Candidate InnerOuter x.y
## Min. :0.5202 Length:625 Length:625 Min. :505213
## 1st Qu.:0.9385 Class :character Class :character 1st Qu.:523226
## Median :1.1492 Mode :character Mode :character Median :530429
## Mean :1.3102 Mean :530354
## 3rd Qu.:1.4391 3rd Qu.:537695
## Max. :9.9948 Max. :557694
##
## y.y AvgGCSE2011 UnauthAbsenceSchools11
## Min. :157876 Min. :245.0 Min. :0.2463
## 1st Qu.:174242 1st Qu.:332.3 1st Qu.:0.8215
## Median :180932 Median :343.7 Median :1.1364
## Mean :180131 Mean :345.8 Mean :1.1286
## 3rd Qu.:186503 3rd Qu.:358.3 3rd Qu.:1.4105
## Max. :199314 Max. :409.1 Max. :2.4675
##
## geometry
## MULTIPOLYGON :625
## epsg:NA : 0
## +proj=tmer...: 0
##
##
##
##
names(LondonWardsSF)
## [1] "WD11CD" "WD11CDO"
## [3] "WD11NM" "WD11NMW"
## [5] "WardName.x" "WardCode.x"
## [7] "Wardcode1" "PopCensus2"
## [9] "Aged0_15" "Aged16_64"
## [11] "Aged65plus" "PctAged0_1"
## [13] "PctAged16_" "PctAged65p"
## [15] "MeanAge201" "MedianAge2"
## [17] "AreaSqKM" "PopDensity"
## [19] "PctBame" "PctNotBorn"
## [21] "PctNoEngli" "GenFertRat"
## [23] "MaleLE0509" "FemaleLE05"
## [25] "RateAmbula" "RatesAmbul"
## [27] "InEmployme" "Employment"
## [29] "NoJobs2011" "EmpWkAgePo"
## [31] "RateNINoFo" "MeanHouseP"
## [33] "NoProperti" "NoHousehol"
## [35] "PctDetache" "PctSemiDet"
## [37] "PctTerrace" "PctFlatMai"
## [39] "PctOwned20" "PctSocialR"
## [41] "PctPrivate" "PctSharedO"
## [43] "PctRentFre" "PctCTaxBan"
## [45] "PctCTaxB_1" "PctCTaxB_2"
## [47] "MortgageRe" "LandlordRe"
## [49] "Incapacity" "IncomeSupp"
## [51] "JSAClaiman" "JSAClaim_1"
## [53] "PctDepChil" "PctDepCh_1"
## [55] "PctHHNoAdu" "PctLonePar"
## [57] "IDRankLond" "IDPctWorst"
## [59] "AvgGCSE201" "UnauthAbse"
## [61] "PctWithNoQ" "PctLev4Qua"
## [63] "CrimeRate1" "ViolenceRa"
## [65] "RobberyRat" "TheftAndHa"
## [67] "CriminalDa" "DrugsRate1"
## [69] "Deliberate" "PctOpenSpa"
## [71] "CarsPerHH2" "AvgPubTran"
## [73] "TurnoutMay" "ID"
## [75] "x.x" "y.x"
## [77] "WardName.y" "WardCode.y"
## [79] "PctSharedOwnership2011" "PctRentFree2011"
## [81] "Candidate" "InnerOuter"
## [83] "x.y" "y.y"
## [85] "AvgGCSE2011" "UnauthAbsenceSchools11"
## [87] "geometry"
London data store for data key=value reference https://londondatastore-upload.s3.amazonaws.com/instant-atlas/borough-profiles/atlas.html
Simple histogram
### style
th <- theme_tufte(base_family = "Georgia")
### AvgPubTrans = Average Public Transport Accesibility score, 2014
ggplot(LondonWardsSF, aes(AvgPubTran)) +
geom_histogram() + th
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Simple histogram binwidth = 0.1
ggplot(LondonWardsSF, aes(AvgPubTran)) +
geom_histogram(binwidth = 0.1) + th
Histogram with vertical lines Central Tendency
CT <- ggplot(LondonWardsSF, aes(AvgPubTran)) +
geom_histogram(binwidth = 0.1) + th +
geom_vline(aes(xintercept = mean(AvgPubTran)), colour="yellow") +
geom_vline(aes(xintercept = median(AvgPubTran)), colour="magenta")
Calulate Mode
### Rounded PTAL to get 'most typical' value (otherwise all are unique values)
x <- round(LondonWardsSF$AvgPubTran, digits = 1)
y <- table(x)
names(y)[which(y==max(y))]
## [1] "2.4"
Histogram with Mode
CT + geom_vline(aes(xintercept = 2.4), colour="cyan")
Annotate Histogram
m_ean <- annotate("text", label = paste("Mean\n", round(mean(LondonWardsSF$AvgPubTran), digits = 2)), x = mean(LondonWardsSF$AvgPubTran), y = 7, color = "white", size = 3, family = "Georgia", hjust =-.01)
m_edian <- annotate("text", label = paste("Median\n", round(median(LondonWardsSF$AvgPubTran), digits = 2)), x = median(LondonWardsSF$AvgPubTran), y = 3, color = "white", size = 3, family = "Georgia", hjust =-.01)
m_ode <- annotate("text", label = paste("Mode: ", 2.4), x = 2.4, y = 3, color = "white", size = 3, family = "Georgia", angle = 90, vjust =-.5)
CT + geom_vline(aes(xintercept = 2.4), colour="cyan") + m_ean + m_edian + m_ode
Kernel density smoothed frequency gradients
ggplot(LondonWardsSF, aes(x=AvgPubTran, y=..density..)) + geom_histogram(aes(y = ..density..))+
geom_density(colour="red", fill="red", alpha=0.2) +
geom_density(colour="orange", fill="orange", alpha=0.2, adjust = 1/5) +
geom_density(colour="purple", fill="purple", alpha=0.2, adjust = 1/3) + th
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Different Kernels
ggplot(LondonWardsSF, aes(x=AvgPubTran, y=..density..)) + geom_histogram(aes(y = ..density..))+
geom_density(colour="red", fill="red", alpha=0.2, adjust = 1/3, kernel="gaussian") +
geom_density(colour="orange", fill="orange", alpha=0.2, adjust = 1/3, kernel="rectangular") +
geom_density(colour="purple", fill="purple", alpha=0.2, adjust = 1/3, kernel="cosine") + th
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Boxplot
bp <- ggplot(LondonWardsSF, aes(1, AvgPubTran)) + theme_tufte() +
geom_tufteboxplot()
bp
Annotate Boxplot
fn <- fivenum(LondonWardsSF$AvgPubTran)
an <- annotate(geom="text", label=round(fn ,digits=2), x= 1, y=fn, size = 3, family = "Georgia", hjust =-.2)
bp + an + theme(axis.title=element_blank(),axis.ticks.x = element_blank(), axis.text.x=element_blank())
Faceted grid
#check which variables are numeric first
list1 <- as.data.frame(cbind(lapply(LondonWardsSF, class)))
list1 <- cbind(list1, seq.int(nrow(list1)))
#you will notice that there are some non-numeric columns, we want to exclue these, and drop the geometry
LondonSub <- LondonWardsSF[,c(1:73,83:86)]
#make sure the geometry is null or we will get errors - also create some subsets so that we can see our data better
LondonSub2 <- st_set_geometry(LondonWardsSF[,c(1:3,9:27)],NULL)
LondonSub3 <- st_set_geometry(LondonWardsSF[,c(1:3,28:50)],NULL)
LondonSub4 <- st_set_geometry(LondonWardsSF[,c(1:3,51:73,85:86)],NULL)
LondonMelt2 <- melt(LondonSub2, id.vars = 1:3)
attach(LondonMelt2)
hist2 <- ggplot(LondonMelt2, aes(x=value)) + geom_histogram(aes(y = ..density..)) + geom_density(colour="cyan", size=1, adjust=1) + theme_tufte()
hist2 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
LondonMelt3 <- melt(LondonSub3, id.vars = 1:3)
attach(LondonMelt3)
## The following objects are masked from LondonMelt2:
##
## value, variable, WD11CD, WD11CDO, WD11NM
hist3 <- ggplot(LondonMelt3, aes(x=value)) + geom_histogram(aes(y = ..density..)) + geom_density(colour="cyan", size=1, adjust=1) + theme_tufte()
hist3 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
LondonMelt4 <- melt(LondonSub4, id.vars = 1:3)
attach(LondonMelt4)
## The following objects are masked from LondonMelt3:
##
## value, variable, WD11CD, WD11CDO, WD11NM
## The following objects are masked from LondonMelt2:
##
## value, variable, WD11CD, WD11CDO, WD11NM
hist4 <- ggplot(LondonMelt4, aes(x=value)) + geom_histogram(aes(y = ..density..)) + geom_density(colour="cyan", size=.7, adjust=1) + theme_tufte()
hist4 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Log10() transformation on the x variables
hist5 <- ggplot(LondonMelt4, aes(x=log10(value))) + geom_histogram(aes(y = ..density..)) + stat_function(fun=dnorm, colour="cyan", size=0.5) + theme_tufte()
hist5 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 90 rows containing non-finite values (stat_bin).
spatial Kernel Density Estimate
londonpoint<-ggplot(LondonSub, aes(x=x.y,y=y.y))+geom_point()+coord_equal()+theme_tufte()
londonpoint
library(viridis)
## Warning: package 'viridis' was built under R version 3.4.4
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 3.4.3
londonpoint<-ggplot(LondonSub, aes(x=x.y,y=y.y))+stat_bin2d(bins=10)+theme_tufte()+scale_fill_viridis()
londonpoint
londonpoint<-ggplot(LondonSub, aes(x=x.y,y=y.y))+geom_point()+coord_equal()+theme_tufte()
londonpoint
londonpoint+stat_density2d(aes(fill = ..level..), geom="polygon")+theme_tufte()+scale_fill_viridis()
Introduction to functions in R
Structure of a fucntion
myfunction <- function(arg1, arg2, ... ){
statements
return(object)
}
Re-classiy data (recode data)
newvar<-0
recode<-function(variable,high,medium,low){
newvar[variable<=high]<-"High"
newvar[variable<=medium]<-"Medium"
newvar[variable<=low]<-"Low"
return(newvar)
}
# First we initialise a new variable called newvar and set it to = 0. We then define a new function called recode. This takes in 4 pieces of information: A variable (called variable but I could have called it anything) and three values called high, medium and low. It outputs a value to the new string variable newvar based on the values of high, medium and low that are given to the function.
We can now use this function to recode any of our continuous variables into high, medium and low values based on the values we enter into the function.
summary(LondonWardsSF$AvgGCSE201)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 245.0 332.3 343.7 345.8 358.3 409.1
LondonWardsSF$GCSE_recode <- recode(LondonWardsSF$AvgGCSE201,409.1,358.3,332.3)
Location Quotient
#Location Quotient function 1
LQ1<-function(pctVariable){
pctVariable / mean(pctVariable)
}
#Location Quotient function 2
LQ2<-function(variable,rowtotal){
localprop<-variable/rowtotal
globalprop<-sum(variable)/sum(rowtotal)
return(localprop/globalprop)
}
Calculate Location Quotients for the 5 Housing tenure variables (Owner Occupied, Private Rent, Social Rent, Shared Ownership, Rent Free)
names(LondonWardsSF)
## [1] "WD11CD" "WD11CDO"
## [3] "WD11NM" "WD11NMW"
## [5] "WardName.x" "WardCode.x"
## [7] "Wardcode1" "PopCensus2"
## [9] "Aged0_15" "Aged16_64"
## [11] "Aged65plus" "PctAged0_1"
## [13] "PctAged16_" "PctAged65p"
## [15] "MeanAge201" "MedianAge2"
## [17] "AreaSqKM" "PopDensity"
## [19] "PctBame" "PctNotBorn"
## [21] "PctNoEngli" "GenFertRat"
## [23] "MaleLE0509" "FemaleLE05"
## [25] "RateAmbula" "RatesAmbul"
## [27] "InEmployme" "Employment"
## [29] "NoJobs2011" "EmpWkAgePo"
## [31] "RateNINoFo" "MeanHouseP"
## [33] "NoProperti" "NoHousehol"
## [35] "PctDetache" "PctSemiDet"
## [37] "PctTerrace" "PctFlatMai"
## [39] "PctOwned20" "PctSocialR"
## [41] "PctPrivate" "PctSharedO"
## [43] "PctRentFre" "PctCTaxBan"
## [45] "PctCTaxB_1" "PctCTaxB_2"
## [47] "MortgageRe" "LandlordRe"
## [49] "Incapacity" "IncomeSupp"
## [51] "JSAClaiman" "JSAClaim_1"
## [53] "PctDepChil" "PctDepCh_1"
## [55] "PctHHNoAdu" "PctLonePar"
## [57] "IDRankLond" "IDPctWorst"
## [59] "AvgGCSE201" "UnauthAbse"
## [61] "PctWithNoQ" "PctLev4Qua"
## [63] "CrimeRate1" "ViolenceRa"
## [65] "RobberyRat" "TheftAndHa"
## [67] "CriminalDa" "DrugsRate1"
## [69] "Deliberate" "PctOpenSpa"
## [71] "CarsPerHH2" "AvgPubTran"
## [73] "TurnoutMay" "ID"
## [75] "x.x" "y.x"
## [77] "WardName.y" "WardCode.y"
## [79] "PctSharedOwnership2011" "PctRentFree2011"
## [81] "Candidate" "InnerOuter"
## [83] "x.y" "y.y"
## [85] "AvgGCSE2011" "UnauthAbsenceSchools11"
## [87] "geometry" "GCSE_recode"
LondonWardsSF$Owner_occ <- LQ1(LondonWardsSF$PctOwned20)
LondonWardsSF$PRent <- LQ1(LondonWardsSF$PctPrivate)
LondonWardsSF$SRent <- LQ1(LondonWardsSF$PctSocialR)
LondonWardsSF$PShared <- LQ1(LondonWardsSF$PctSharedO)
LondonWardsSF$RFree <- LQ1(LondonWardsSF$PctRentFre)
Location Quotient Mapper function by A.Dennett
#############################################################
##A Function for creating various location quotient maps
##
##By Adam Dennett October 2014 - updated November 2018
##
##Please note, this function requires input data to already be in ##the form of row percentages. To create the function, highlight the ##whole block of code and run it. To run the function, simply use ##LQMapper(your_dataframe)
library(rgeos)
## Warning: package 'rgeos' was built under R version 3.4.4
## rgeos version: 0.3-28, (SVN revision 572)
## GEOS runtime version: 3.6.1-CAPI-1.10.1 r0
## Linking to sp version: 1.2-7
## Polygon checking: TRUE
library(ggplot2)
library(maptools)
## Warning: package 'maptools' was built under R version 3.4.4
## Checking rgeos availability: TRUE
library(sf)
library(tmap)
## Warning: package 'tmap' was built under R version 3.4.4
sfdataframe <- LondonWardsSF
LQMapper<-function(sfdataframe){
print(colnames(sfdataframe))
vars<-readline("From the list above, select the variables
you want to calculate location quotients for
separated by spaces...")
# split the string at the spaces
vars<-unlist(strsplit(vars, split = "\\s"))
# now save vars as a list
vars<-as.list(vars)
print("looping to create new location quotient variables...")
attach(sfdataframe)
for(i in 1:length(vars)){
pctVariable<-vars[[i]]
colvect<-which(colnames(sfdataframe)==vars[[i]])
#this is a little function to calculate location quotients
LQ<-function(pctVariable){
pctVariable/mean(pctVariable)
}
#use LQ function here to create new variable in sfdataframe
#and save it
v <- sfdataframe[,colvect]
sfdataframe[,paste("LQ_",pctVariable, sep="")] <- LQ(v[[pctVariable]])
}
#reset i as we're going to use it again in a minute
i=0
print("now entering the plotting loop")
for(i in 1:length(vars)){
print("I'm plotting")
pctVariable<-paste("LQ_",vars[[i]],sep="")
colvect<-which(colnames(sfdataframe)==paste("LQ_",vars[[i]],sep=""))
#create the plot
LQMapperPlot <- tm_shape(sfdataframe) + tm_polygons(pctVariable,
style="jenks",
palette="Spectral",
midpoint=1,
title=pctVariable,
alpha = 0.5)
LQMapperPlot
#save the plot to a pdf and give it a name based on its variable
tmap_save(LQMapperPlot, filename=paste(pctVariable,".png",sep=""))
}
return(sfdataframe)
}
###################################################################
#LQMapper(LondonWardsSF)
tm_shape(LondonWardsSF) +
tm_polygons("SRent",
style="jenks",
palette="PuRd",
midpoint=1,
title="PctSocialR",
border.col="white",
border.alpha = 0.01)
Basic Geodemographic Classification
In a cluster analysis, you should select variables that are: * Ranged on the same scale * Normally distributed * Not highly correlated
LondonWardsDF <- st_set_geometry(LondonWardsSF, NULL)
#display list nad class of variables
cbind(lapply(LondonWardsDF, class))
## [,1]
## WD11CD "factor"
## WD11CDO "factor"
## WD11NM "factor"
## WD11NMW "factor"
## WardName.x "factor"
## WardCode.x "factor"
## Wardcode1 "factor"
## PopCensus2 "numeric"
## Aged0_15 "numeric"
## Aged16_64 "numeric"
## Aged65plus "numeric"
## PctAged0_1 "numeric"
## PctAged16_ "numeric"
## PctAged65p "numeric"
## MeanAge201 "numeric"
## MedianAge2 "numeric"
## AreaSqKM "numeric"
## PopDensity "numeric"
## PctBame "numeric"
## PctNotBorn "numeric"
## PctNoEngli "numeric"
## GenFertRat "numeric"
## MaleLE0509 "numeric"
## FemaleLE05 "numeric"
## RateAmbula "numeric"
## RatesAmbul "numeric"
## InEmployme "numeric"
## Employment "numeric"
## NoJobs2011 "numeric"
## EmpWkAgePo "numeric"
## RateNINoFo "numeric"
## MeanHouseP "numeric"
## NoProperti "numeric"
## NoHousehol "numeric"
## PctDetache "numeric"
## PctSemiDet "numeric"
## PctTerrace "numeric"
## PctFlatMai "numeric"
## PctOwned20 "numeric"
## PctSocialR "numeric"
## PctPrivate "numeric"
## PctSharedO "numeric"
## PctRentFre "numeric"
## PctCTaxBan "numeric"
## PctCTaxB_1 "numeric"
## PctCTaxB_2 "numeric"
## MortgageRe "numeric"
## LandlordRe "numeric"
## Incapacity "numeric"
## IncomeSupp "numeric"
## JSAClaiman "numeric"
## JSAClaim_1 "numeric"
## PctDepChil "numeric"
## PctDepCh_1 "numeric"
## PctHHNoAdu "numeric"
## PctLonePar "numeric"
## IDRankLond "numeric"
## IDPctWorst "numeric"
## AvgGCSE201 "numeric"
## UnauthAbse "numeric"
## PctWithNoQ "numeric"
## PctLev4Qua "numeric"
## CrimeRate1 "numeric"
## ViolenceRa "numeric"
## RobberyRat "numeric"
## TheftAndHa "numeric"
## CriminalDa "numeric"
## DrugsRate1 "numeric"
## Deliberate "numeric"
## PctOpenSpa "numeric"
## CarsPerHH2 "numeric"
## AvgPubTran "numeric"
## TurnoutMay "numeric"
## ID "integer"
## x.x "factor"
## y.x "factor"
## WardName.y "character"
## WardCode.y "character"
## PctSharedOwnership2011 "numeric"
## PctRentFree2011 "numeric"
## Candidate "character"
## InnerOuter "character"
## x.y "numeric"
## y.y "numeric"
## AvgGCSE2011 "numeric"
## UnauthAbsenceSchools11 "numeric"
## GCSE_recode "character"
## Owner_occ "numeric"
## PRent "numeric"
## SRent "numeric"
## PShared "numeric"
## RFree "numeric"
Create new Dataframe with 2 variables
GD<-as.data.frame(LondonWardsDF[,c("CarsPerHH2","PctAged65p")])
mean(LondonWardsDF$CarsPerHH2)
## [1] 0.8426429
median(LondonWardsDF$CarsPerHH2)
## [1] 0.818353
mean(LondonWardsDF$PctAged65p)
## [1] 11.34829
median(LondonWardsDF$PctAged65p)
## [1] 10.66135
# run a k-means to find 3 clusters – use 25 iterations
fit <- kmeans(GD, 3, nstart=25) # 3 cluster solution
# get cluster means
centroid<-aggregate(GD,by=list(fit$cluster),FUN=mean)
#print the results of the cluster groupings
centroid
# as we only have variable two dimensions we can plot the clusters on a graph
p <- ggplot(GD,aes(CarsPerHH2, PctAged65p))
p+geom_point(aes(colour=factor(fit$cluster)))+geom_point(data=centroid[,2:3],aes(CarsPerHH2, PctAged65p), size=7, shape=18)+ theme(legend.position="none")
GD$cluster <- fit$cluster
#add the cluster groups to the LondonWards data frame
LondonWardsSF$cluster<-GD$cluster
#now map our geodeomographic classification
map <- ggplot(LondonWardsSF) + geom_sf(mapping = aes(fill=cluster))+theme_tufte()+scale_fill_viridis()
map